import scrapy

from myspider.items import MovieItem


class DoubanSpider(scrapy.Spider):
    name = 'douban'
    allowed_domains = ['movie.douban.com']
    start_urls = ['https://movie.douban.com/top250?start=0&filter=']

    def parse(self, response):
        selector = scrapy.Selector(response)
        # Selector对象支持CSS选择器解析、正则表达式解析、XPath语法解析
        # ~ css() / ~ re() / ~ xpath()
        titles = selector.css('div.hd > a > span.title:nth-child(1)::text').extract()
        links = selector.css('div.hd > a::attr(href)').extract()
        ratings = selector.css('div.bd > div > span.rating_num::text').extract()
        quotes = selector.css('div.bd > p.quote > span::text').extract()

        for item_data in zip(titles, links, ratings, quotes):
            item = MovieItem()
            item['title'], item['link'], item['rating'], item['quote'] = item_data
            yield item

        hrefs = selector.css('div.paginator > a::attr(href)').extract()
        for href in hrefs:
            full_url = response.urljoin(href)
            yield scrapy.Request(url=full_url)
